%%%% data for plots
clc;
clear all;


%%
load('simulated_data');
data=csvread('data_section4.csv',1,0);

default=data(:,1);
donation=data(:,2);

total_obs=length(donation);

donation(donation<1 & donation>0)=1;
donation=round(donation);
sim_donation10=round(simd.sim_donation10);
sim_donation20=round(simd.sim_donation20);
sim_donation50=round(simd.sim_donation50);

%%
%add zeros up to 3.5% of sample
rate_sample=0.035
nzeros_ad= round(length(donation(default==0))*rate_sample - length(donation(donation>0 & default==0)))
nzeros_10= round(length(donation(default==10))*rate_sample - length(donation(donation>0 & default==10)))
nzeros_20= round(length(donation(default==20))*rate_sample - length(donation(donation>0 & default==20)))
nzeros_50= round(length(donation(default==50))*rate_sample - length(donation(donation>0 & default==50)))

%%
subsample= (donation<=300 & donation>0);
donation=donation(subsample);
default=default(subsample);
%%

add_don_AD=zeros(nzeros_ad,1);
donation=vertcat(donation, add_don_AD);
add_def_AD= zeros(nzeros_ad,1);
default=vertcat(default, add_def_AD);

add_don_10= zeros(nzeros_10,1);
donation=vertcat(donation, add_don_10);
add_def_10=ones(nzeros_10,1).*10;
default=vertcat(default, add_def_10);

add_don_20= zeros(nzeros_20,1);
donation=vertcat(donation, add_don_20);
add_def_20= ones(nzeros_20,1).*20;
default=vertcat(default, add_def_20);

add_don_50= zeros(nzeros_50,1);
donation=vertcat(donation, add_don_50);
add_def_50= ones(nzeros_50,1).*50;
default=vertcat(default, add_def_50);

%% to ensure data matrix has same dimensions: add missing values 
a=length(donation);
b=length(sim_donation10)
if a>b;
add_on=NaN(a-b,1);
sim_donation10=vertcat(sim_donation10, add_on);
sim_donation20=vertcat(sim_donation20, add_on);
sim_donation50=vertcat(sim_donation50, add_on);
elseif b>a;
add_on=NaN(b-a,1);
donation=vertcat(donation, add_on);
default= vertcat(default, add_on);
else 
end
    


%%

savedat= [donation, default, sim_donation10, sim_donation20, sim_donation50];
csvwrite('datavssimulation.csv',savedat);